clear all
set more off
tempfile temp xxx

** PATHS
local ubigeo ../../ubigeo

** DATA
use `ubigeo'/ubigeo1993

sort coddpto codprov coddist
by coddpto: gen DEPTname = nombre[1]
by coddpto codprov: gen PROVname = nombre[1]
gen DISTname = nombre
replace DEPTname = trim(DEPTname)
replace PROVname = trim(PROVname)
replace DISTname = trim(DISTname)

drop if coddist == "00"
gen ubigeo = coddpto + codprov + coddist
drop coddpto codprov coddist nombre

replace DISTname = subinstr(DISTname,"¥","Ñ",.)
replace PROVname = subinstr(PROVname,"¥","Ñ",.)

gen y = _n
rename ubigeo ubigeo1993
save `temp', replace


import delimited `ubigeo'/ubigeo2014.csv, varnames(1) clear 
gen coddpto = substr(departamento,1,2)
gen codprov = substr(provincia,1,2)
gen coddist = substr(distrito,1,2)

gen DEPTname = substr(departamento,3,70)
gen PROVname = substr(provincia,3,70)
gen DISTname = substr(distrito,3,70)

drop if real(coddist) == .
replace DEPTname = trim(itrim(DEPTname))
replace PROVname = trim(itrim(PROVname))
replace DISTname = trim(itrim(DISTname))

replace DEPTname = upper(DEPTname)
replace PROVname = upper(PROVname)
replace DISTname = upper(DISTname)

replace DISTname = subinstr(DISTname,"¥","Ñ",.)
replace PROVname = subinstr(PROVname,"¥","Ñ",.)
replace DISTname = subinstr(DISTname,"ñ","Ñ",.)
replace PROVname = subinstr(PROVname,"ñ","Ñ",.)
replace DISTname = subinstr(DISTname,"á","A",.)
replace PROVname = subinstr(PROVname,"á","A",.)
replace DEPTname = subinstr(DEPTname,"á","A",.)
replace DISTname = subinstr(DISTname,"é","E",.)
replace PROVname = subinstr(PROVname,"é","E",.)
replace DEPTname = subinstr(DEPTname,"é","E",.)
replace DISTname = subinstr(DISTname,"í","I",.)
replace PROVname = subinstr(PROVname,"í","I",.)
replace DEPTname = subinstr(DEPTname,"í","I",.)
replace DISTname = subinstr(DISTname,"ó","O",.)
replace PROVname = subinstr(PROVname,"ó","O",.)
replace DEPTname = subinstr(DEPTname,"ó","O",.)
replace DISTname = subinstr(DISTname,"ú","U",.)
replace PROVname = subinstr(PROVname,"ú","U",.)
replace DEPTname = subinstr(DEPTname,"ú","U",.)

gen ubigeo2014 = coddpto + codprov + coddist

keep ubigeo *name*
gen x = _n

reclink DEPTname PROVname DISTname using `temp', idmaster(x) idusing(y) gen(score)
tab _merge
destring ubigeo*, replace
gen ubigeohost = ubigeo2014
gsort -score ubigeohost
gen dd = DISTname == UDISTname
tab ubigeohost if _merge == 1

replace ubigeo1993 = 010201 if ubigeohost == 010201 & _merge == 1 // bagua to la peca
replace ubigeo1993 = 021801 if ubigeohost == 021809 & _merge == 1 // nuevo chimbote to old chimbote
replace ubigeo1993 = 030202 if ubigeohost == 030219 & _merge == 1 // nuevo chimbote to old chimbote
replace ubigeo1993 = 030201 if ubigeohost == 030220 & _merge == 1 // arguedas to andahuaylas
replace ubigeo1993 = 030501 if ubigeohost == 030506 & _merge == 1 // challhuahuacho to tambobamba
replace ubigeo1993 = 030706 if ubigeohost == 030714 & _merge == 1 // curasco to micaela bastidas
replace ubigeo1993 = 040112 if ubigeohost == 040129 & _merge == 1 // Jose L Bust. y Rivero to paucarpata
replace ubigeo1993 = 040511 if ubigeohost == 040520 & _merge == 1 // majes to lluta
replace ubigeo1993 = 050101 if ubigeohost == 050115 & _merge == 1 // jesus nazareno to ayacucho
replace ubigeo1993 = 050101 if ubigeohost == 050116 & _merge == 1 // a. a. caceres to ayacucho
replace ubigeo1993 = 050407 if ubigeohost == 050408 & _merge == 1 // llochegua to sivia
replace ubigeo1993 = 050402 if ubigeohost == 050409 & _merge == 1 // canayre to ayahuanco
replace ubigeo1993 = 050402 if ubigeohost == 050410 & _merge == 1 // uchuraccay to ayahuanco
replace ubigeo1993 = 050402 if ubigeohost == 050411 & _merge == 1 // pucacolpa to ayahuanco
replace ubigeo1993 = 050402 if ubigeohost == 050412 & _merge == 1 // chanta to ayahuanco
replace ubigeo1993 = 050502 if ubigeohost == 050510 & _merge == 1 // anchihyay to anco
replace ubigeo1993 = 050501 if ubigeohost == 050509 & _merge == 1 // samugari to san miguel
replace ubigeo1993 = 060307 if ubigeohost == 060312 & _merge == 1 // la libertad de pallan to miguel iglesias
replace ubigeo1993 = 060413 if ubigeohost == 060419 & _merge == 1 // chalamarca to paccha
replace ubigeo1993 = 070106 if ubigeohost == 070107 & _merge == 1 // mi peru to ventanilla
replace ubigeo1993 = 080806 if ubigeohost == 080808 & _merge == 1 // alto pichigua to pichigua
replace ubigeo1993 = 090101 if ubigeohost == 090118 & _merge == 1 // ascension to huancavelica
replace ubigeo1993 = 090502 if ubigeohost == 090511 & _merge == 1 // cosme to anco
replace ubigeo1993 = 100302 if ubigeohost == 101104 & _merge == 1 // aparicio pomares to chupa
replace ubigeo1993 = 100306 if ubigeohost == 101108 & _merge == 1 // choras to chavinillo
replace ubigeo1993 = 100702 if ubigeohost == 100704 & _merge == 1 // la morada to cholon
replace ubigeo1993 = 100702 if ubigeohost == 100705 & _merge == 1 // sta rosa de jaracanja to cholon
replace ubigeo1993 = 120601 if ubigeohost == 120609 & _merge == 1 // vizctan del ene to satipo
replace ubigeo1993 = 130201 if ubigeohost == 130208 & _merge == 1 // casa grande to ascope
replace ubigeo1993 = 130112 if ubigeohost == 131202 & _merge == 1 // cguadalupito to old viru
replace ubigeo1993 = 130112 if ubigeohost == 131203 & _merge == 1 // viru to old viru
replace ubigeo1993 = 140115 if ubigeohost == 140116 & _merge == 1 // patapo to sana
replace ubigeo1993 = 140115 if ubigeohost == 140117 & _merge == 1 // pucala to sana
replace ubigeo1993 = 140115 if ubigeohost == 140119 & _merge == 1 // cayalti to sana
replace ubigeo1993 = 140101 if ubigeohost == 140118 & _merge == 1 // pomalca to cix
replace ubigeo1993 = 140101 if ubigeohost == 140120 & _merge == 1 // tuman to cix
replace ubigeo1993 = 160401 if ubigeohost == 160404 & _merge == 1 // san pablo to ramon castill
replace ubigeo1993 = 160507 if ubigeohost == 160510 & _merge == 1 // jenaro herrera to saquena
replace ubigeo1993 = 160209 if ubigeohost == 160706 & _merge == 1  // andoas to pastaza
replace ubigeo1993 = 160109 if ubigeohost == 160802 & _merge == 1  // ... to putumayo
replace ubigeo1993 = 160109 if ubigeohost == 160803 & _merge == 1  // ... to putumayo
replace ubigeo1993 = 160109 if ubigeohost == 160804 & _merge == 1  // ... to putumayo
replace ubigeo1993 = 170101 if ubigeohost == 170104 & _merge == 1 // laberinto to tambopata
replace ubigeo1993 = 170201 if ubigeohost == 170204 & _merge == 1 // huetetopue to manu
replace ubigeo1993 = 190306 if ubigeohost == 190308 & _merge == 1 // huetetopue to manu
replace ubigeo1993 = 211201 if ubigeohost == 211209 & _merge == 1 // alto inambaro to sandia
replace ubigeo1993 = 211207 if ubigeohost == 211210 & _merge == 1 // putina to san juan
replace ubigeo1993 = 230101 if ubigeohost == 230110 & _merge == 1 // albarracin to tacna
replace ubigeo1993 = 230101 if ubigeohost == 230111 & _merge == 1 // yarada palos to tacna
replace ubigeo1993 = 240201 if ubigeohost == 240203 & _merge == 1 // albarracin to tacna
replace ubigeo1993 = 250301 if ubigeohost == 250303 & _merge == 1 // curimana to p abad
replace ubigeo1993 = 250302 if ubigeohost == 250304 & _merge == 1 // neshuya to irazola
replace ubigeo1993 = 250302 if ubigeohost == 250305 & _merge == 1 // von humbolt to irazola

tab ubigeohost if ubigeo1993 == . & _merge == 1

gen flag = (score < 0.7 & _merge == 3 & dd == 0)
tab ubigeohost if flag == 1

replace ubigeo1993 = 030601 if ubigeohost == 030608 & flag == 1 // curimana to p abad
replace flag = 0 if ubigeohost == 030608 & flag == 1

replace ubigeo1993 = 030604 if ubigeohost == 030609 & flag == 1 // rocchacc to huaccana
replace flag = 0 if ubigeohost == 030609 & flag == 1

replace ubigeo1993 = 030604 if ubigeohost == 030610 & flag == 1 // porvenir to huaccana
replace flag = 0 if ubigeohost == 030610 & flag == 1

replace ubigeo1993 = 080907 if ubigeohost == 080910 & flag == 1 // pichari to kimbiri
replace flag = 0 if ubigeohost == 080910 & flag == 1

replace ubigeo1993 = 080909 if ubigeohost == 080911 & flag == 1 // inkawasi to vilcabamba
replace flag = 0 if ubigeohost == 080911 & flag == 1

replace ubigeo1993 = 080909 if ubigeohost == 080912 & flag == 1 // villa virgen to vilcabamba
replace flag = 0 if ubigeohost == 080912 & flag == 1

replace ubigeo1993 = 080907 if ubigeohost == 080913 & flag == 1 // villa kintiarina to kimbiri
replace flag = 0 if ubigeohost == 080913 & flag == 1

replace ubigeo1993 = 090706 if ubigeohost == 090719 & flag == 1 // quichuas to colcabamba
replace flag = 0 if ubigeohost == 090719 & flag == 1

replace ubigeo1993 = 090718 if ubigeohost == 090721 & flag == 1 // quichuas to colcabamba
replace flag = 0 if ubigeohost == 090721 & flag == 1

replace ubigeo1993 = 090709 if ubigeohost == 090722 & flag == 1 // pichos to huaribamba
replace flag = 0 if ubigeohost == 090722 & flag == 1

replace ubigeo1993 = 100103 if ubigeohost == 100113 & flag == 1 // pillao to chinchao
replace flag = 0 if ubigeohost == 100113 & flag == 1

replace ubigeo1993 = 100102 if ubigeohost == 100111 & flag == 1 // pillco arca to amarilis
replace flag = 0 if ubigeohost == 100111 & flag == 1

replace ubigeo1993 = 100106 if ubigeohost == 100112 & flag == 1 // yauco to quisqui
replace flag = 0 if ubigeohost == 100112 & flag == 1

replace ubigeo1993 = 100604 if ubigeohost == 100607 & flag == 1 // pucayacu to crespo y castillo
replace flag = 0 if ubigeohost == 100607 & flag == 1

replace ubigeo1993 = 100601 if ubigeohost == 100608 & flag == 1 // castillo grande to rupa-rupa
replace flag = 0 if ubigeohost == 100608 & flag == 1

replace flag = 0 if ubigeohost == 120908 & flag == 1 // tres de diciembre
replace flag = 0 if ubigeohost == 150121 & flag == 1 // pueblo libre to magdalena vieja

replace ubigeo1993 = 160101 if ubigeohost == 160112 & flag == 1  // belen to iquitos
replace flag = 0 if ubigeohost == 160112 & flag == 1

replace ubigeo1993 = 160101 if ubigeohost == 160113 & flag == 1 // san juan bautista to iquitos
replace flag = 0 if ubigeohost == 160113 & flag == 1

replace ubigeo1993 = 160109 if ubigeohost == 160114 & flag == 1 // tentiente.. to putumayo
replace flag = 0 if ubigeohost == 160114 & flag == 1

replace ubigeo1993 = 200101 if ubigeohost == 200115 & flag == 1 // 26 de cot to piura
replace flag = 0 if ubigeohost == 200115 & flag == 1

replace ubigeo1993 = 210504 if ubigeohost == 210505 & flag == 1 // tentiente.. to putumayo
replace flag = 0 if ubigeohost == 210505 & flag == 1

replace flag = 0 if ubigeohost == 230402 & flag == 1 // albarracin to chucatami

replace ubigeo1993 = 250101 if ubigeohost == 250107 & flag == 1 // manantay to calleria
replace flag = 0 if ubigeohost == 250107 & flag == 1

replace ubigeo1993 = 250102 if ubigeohost == 250106 & flag == 1 // n requena to campo verde
replace flag = 0 if ubigeohost == 250106 & flag == 1

tab ubigeohost if flag == 1


keep ubigeo2014 ubigeo1993
duplicates drop

save `ubigeo'/ubigeo2014to1993, replace
